#########
## Nature Human Behaviour 
## Leading Countries in Global Science Increasingly Receive More Citations than Other Countries Despite Doing Similar Research.
## https://doi.org/10.1038/s41562-022-01351-5
## Harvard Dataverse (Code and Metadata): https://doi.org/10.7910/DVN/WCOINR 
## Supplemental Information Figures for Figure 2
## Data: Data_20210905
## Data: Data_20210528 (Only for English and Non-English Comparison)
#########

#.rs.restartR()
gc(reset=T)
rm(list = ls(all.names = TRUE))

#### Libraries ----------------
library("dplyr")
library("ggplot2")
library("scales")

#### Set up Parameters -----------
minimum_year <- 1980
maximum_year <- 2017
Percentile_Cutoff_Criterion_ <- 0
Citation_Window_Criterion_ <- 5
Inflation_Criterion_ <- "Field_Deflated"
Journal_Censoring_Criterion_ <- "Since_1980"
Sample_Core_Countries_ <- c("United States","China","Germany","United Kingdom","Japan","Netherlands","Switzerland")

#### Functions ----------------
Grand_SE <- function(x){return(sqrt(sum(x^2,na.rm=T)/length(x)^2))} #https://stats.stackexchange.com/questions/21104/calculate-average-of-a-set-numbers-with-reported-standard-errors

mean_center <- function(x){return(( (x-mean(x,na.rm=T))/sd(x,na.rm=T)))}

standard_error <- function(x){return(sd(x,na.rm=T)/sqrt(length(x)))}

##### MAG MetaData Information ----------
MAG_Field_ID_df <- read.csv("INPUT_MAG_FieldIDs_Domain.csv") %>%
  dplyr::mutate(Domain = case_when(Domain=="Medicine" ~ "Biomedical, Behavioral,\nand Ecological Sciences",
                                   Domain=="Engineering and Computer Science" ~ "Engineering and\nComputational Sciences",
                                   Domain=="Social Sciences" ~ "Social\nSciences",
                                   Domain=="Life, Behavioral, and Ecological Sciences" ~ "Biomedical, Behavioral,\nand Ecological Sciences",
                                   Domain=="Physical and Mathematical Sciences" ~ "Physical and\nMathematical Sciences",
                                   TRUE ~ as.character(Domain))) %>%
  as.data.frame()


##############################
##### Region DF
##############################

Regional_df <- read.csv("INPUT_R_Nation_to_Region_Core.csv") %>%
  dplyr::mutate(Region = case_when(Nation=="PUERTO_RICO" ~ "LATIN_AND_SOUTH_AMERICA",
                                   Nation=="CUBA" ~ "LATIN_AND_SOUTH_AMERICA",
                                   Nation=="GREENLAND" ~ "WESTERN_EUROPE",
                                   Nation=="FRENCH_POLYNESIA" ~ "OCEANIA",
                                   Nation=="BRUNEI_DARUSSALAM" ~ "SOUTH_EAST_ASIA",
                                   Nation=="COMOROS" ~ "SUB_AFRICA",
                                   Nation=="CABO_VERDE" ~ "SUB_AFRICA",
                                   Nation=="KIRIBATI" ~ "OCEANIA",
                                   Nation=="LAO_PDR" ~ "SOUTH_EAST_ASIA",
                                   Nation=="MOLDOVA" ~ "EASTERN_EUROPE_USSR",
                                   Nation=="MAURITANIA" ~ "MENA",
                                   Nation=="NEW_CALEDONIA" ~ "OCEANIA",
                                   Nation=="SAN_MARINO" ~ "WESTERN_EUROPE",
                                   Nation=="SAO_TOME_AND_PRINCIPE" ~ "SUB_AFRICA",
                                   Nation=="TUVALU" ~ "OCEANIA",
                                   Nation=="ST_VINCENT_AND_THE_GRENADINES" ~ "CARIBBEAN",
                                   Nation=="MAURITIUS" ~ "SUB_AFRICA",
                                   Nation=="RUSSIA" ~ "CENTRAL_ASIA",
                                   TRUE ~ as.character(Region))) %>%
  dplyr::mutate(Region = case_when(
    Region=="CARIBBEAN" ~ "Latin America\nand Caribbean",
    Region=="WESTERN_EUROPE" ~ "Europe",
    Region=="SUB_AFRICA" ~ "MENA and\nSub-Saharan Africa",
    Region=="EASTERN_EUROPE_USSR" ~ "Europe",
    Region=="EAST_ASIA" ~ "Asia",
    Region=="CENTRAL_ASIA" ~ "Asia",
    Region=="SOUTH_EAST_ASIA" ~ "Asia",
    Region=="LATIN_AND_SOUTH_AMERICA" ~ "Latin America\nand Caribbean",
    Region=="NORTH_AMERICA" ~ "US and Canada",
    Region=="MENA" ~ "MENA and\nSub-Saharan Africa",
    Region=="OCEANIA" ~ "Oceania"
  ))%>%
  as.data.frame() %>%
  dplyr::select("Nation","Region") 

#######################################
###### Supplemental Information

###### Coherence Cutoffs
###### A). Number of Countries 2A by Cutoffs Coherence (Deflated,Window Five Years Citations,English Only)
###### B). Beta Coefficients 2B by Cutoffs Coherence (Deflated,Window Five Years Citations,English Only)
###### C). Domain Beta Coefficients 2C by Cutoff Coherence (Deflated,Window Five Years Citations,English Only)

###### Deflated vs Inflated
###### A). Beta Coefficients 2B by Deflated vs. Standard (Cutoff=0, Window Five Years Citations,English Only)
###### B). Domain Beta Coefficients 2C by  Deflated vs. Standard  (Cutoff=0,Window Five Years Citations,English Only)

###### Core and All without Core (Periphery)
###### A). Beta Coefficients 2B with Core and Periphery (Cutoff=0, Window Five Years Citations,English Only)
###### B). Domain Beta Coefficients 2C with Core and Periphery  (Cutoff=0,Window Five Years Citations,English Only)

###### English versus Non-English 
###### A). Beta Coefficients 2B with English and Non-English (Cutoff=0, Deflated, Window Five Years Citations)
###### B). Domain Beta Coefficients 2C with English and Non-English  (Cutoff=0,Window Five Years Citations)

###### Core Journals versus Non-Core
###### A). Beta Coefficients 2B with Core / Non-Core Journals (Cutoff=0, Deflated, Window Five Years Citations,English Only)
###### B). Domain Beta Coefficients 2C with Core / Non-Core Journals  (Cutoff=0,Window Five Years Citations,English Only)

#######################################

Beta_df <- plyr::ldply(list.files(pattern="OUTPUT_Python_Field_MRQAP_Beta_KLD_Citation_Corpus_RAKE_and_GoogleAPI_EnglishOnly_*",
                                  full.names=T), 
                       read.csv) %>%
  subset(Stars!="" & 
           Year>=minimum_year & 
           Year<=(maximum_year-Citation_Window_Criterion_)) %>%
  merge(.,subset(MAG_Field_ID_df,select=c("FieldID","Name")),by.x=c("Discipline"),by.y=c("FieldID"),how="left") %>%
  subset(Coefficients!="Intercept") %>%
  as.data.frame() %>%
  subset(Coefficients%in%c("Citations_ji"))%>%
  subset(Percentile_Cutoff %in% c(0,0.25,0.5,0.75)) %>%
  tidyr::drop_na()

KLD_Degree_Centrality_df <- plyr::ldply(list.files(pattern="OUTPUT_Python_Field_Degree_Centrality_KLD_Citation_Corpus_RAKE_and_GoogleAPI_EnglishOnly_*",
                                                   full.names=T), 
                                        read.csv)

# Coherence Cutoffs ---------------------------------

# Coherence Cutoffs | Num Countries =================================

num_Countries_df_SM_Coherence <- KLD_Degree_Centrality_df %>%
  dplyr::select(Discipline,Year,Percentile_Cutoff,Citation_Type,Citation_Window,Journal_Censoring,Country)%>%
  dplyr::group_by(Discipline,Year,Percentile_Cutoff,Citation_Type,Citation_Window,Journal_Censoring)%>%
  dplyr::summarise(Number_of_Countries=n())%>%
  subset(Year>=minimum_year & Year<=maximum_year-Citation_Window_Criterion_) %>%
  tidyr::drop_na()%>%
  as.data.frame()

num_Countries_df_DOMAIN_SM_Coherence <- KLD_Degree_Centrality_df %>%
  dplyr::group_by(Discipline,Year,Percentile_Cutoff,Citation_Type,Citation_Window,Journal_Censoring)%>%
  dplyr::summarise(Number_of_Countries=n())%>%  
  merge(.,MAG_Field_ID_df %>%
          dplyr::rename(Discipline=FieldID) %>%
          dplyr::select(Discipline,Domain),
        by=c("Discipline")) %>%
  dplyr::group_by(Domain,Percentile_Cutoff,Citation_Type,Citation_Window,Journal_Censoring,Year) %>%
  dplyr::summarise(Number_of_Countries_Mean = mean(`Number_of_Countries`,na.rm=T),
                   Number_of_Countries_SE = sd(`Number_of_Countries`,na.rm=T)/sqrt(length(`Number_of_Countries`))) %>%
  subset(Year>=minimum_year & Year<=maximum_year-Citation_Window_Criterion_) %>%
  as.data.frame() %>%
  dplyr::rename(Number_of_Countries=Number_of_Countries_Mean)%>%
  tidyr::drop_na()%>%
  dplyr::mutate(Percentile_Cutoff_Labels = case_when(Percentile_Cutoff==0 ~ "All Inclusive",
                                                     Percentile_Cutoff==0.25 ~ "Above 25th\nPercentile",
                                                     Percentile_Cutoff==0.5 ~ "Above 50th\nPercentile",
                                                     Percentile_Cutoff==0.75 ~ "Above 75th\nPercentile"))


num_Countries_df_Percentile_SM_Coherence <- num_Countries_df_SM_Coherence %>%
  subset(Percentile_Cutoff!=Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  as.data.frame()

num_Countries_df_DOMAIN_Percentile_SM_Coherence <- num_Countries_df_DOMAIN_SM_Coherence %>%
  subset(Percentile_Cutoff!=Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  as.data.frame()

### Coherence Cutoffs | Num Countries | Plot ############################# 

SM_Coherence_Cutoffs_Num_Countries <- ggplot(data=num_Countries_df_Percentile_SM_Coherence,
                                  aes(x=Year,y=Number_of_Countries))+
  # Example Domains
  geom_point(data=num_Countries_df_DOMAIN_Percentile_SM_Coherence,aes(colour=Domain))+
  geom_line(data=num_Countries_df_DOMAIN_Percentile_SM_Coherence,aes(colour=Domain))+
  geom_ribbon(data=num_Countries_df_DOMAIN_Percentile_SM_Coherence,
              aes(colour=Domain,
                  fill=Domain,
                  ymax=Number_of_Countries+Number_of_Countries_SE,
                  ymin=Number_of_Countries-Number_of_Countries_SE),
              alpha=0.2)+
  # ggrepel::geom_label_repel(data=subset(num_Countries_df_DOMAIN_Percentile_SM_Coherence,
  #                                       Year==2012),
  #                           aes(fill=Domain,label=Domain,size=5),
  #                           fontface = 'bold', color = 'white',show.legend = F)+
  #stat_summary(fun.data = "mean_se", colour = "red", size = 1, aes(group=Year)) + 
  xlab("Years")+
  ylab("Number of Countries")+
  theme_bw()+
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.text=element_text(size=8))+
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.title=element_blank())+
  theme(legend.background = element_rect(colour = "black"))+
  ggsci::scale_color_npg()+
  ggsci::scale_fill_npg()+
  facet_wrap(~Percentile_Cutoff_Labels)


# Coherence Cutoffs | Beta =================================

Beta_df_Trends_SM_Coherence <- Beta_df %>%
  dplyr::group_by(Coefficients,Year,Country_Inclusion_Criteria,Percentile_Cutoff,Citation_Type,Citation_Window,Journal_Censoring,Model_Type) %>%
  dplyr::summarise(Beta_Mean = mean(Beta,na.rm=T),
                   Beta_SE = Grand_SE(Beta/Test_Statistics)) %>%
  as.data.frame() %>%
  dplyr::rename("Beta"="Beta_Mean") %>%
  subset(Coefficients!="Intercept") %>%
  dplyr::mutate(Percentile_Cutoff_Labels = case_when(Percentile_Cutoff==0 ~ "All Inclusive",
                                                     Percentile_Cutoff==0.25 ~ "Above 25th\nPercentile",
                                                     Percentile_Cutoff==0.5 ~ "Above 50th\nPercentile",
                                                     Percentile_Cutoff==0.75 ~ "Above 75th\nPercentile"))


Beta_df_Percentile_SM_Coherence <- Beta_df %>%
  subset(Percentile_Cutoff!=Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Country_Inclusion_Criteria!='All_wo_Core')%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  as.data.frame()


Beta_df_Trends_Percentile_SM_Coherence <- Beta_df_Trends_SM_Coherence %>%
  subset(Percentile_Cutoff!=Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Country_Inclusion_Criteria!='All_wo_Core')%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  as.data.frame()

### Coherence Cutoffs | Beta | Plot ############################# 

SM_Coherence_Cutoffs_Beta_Coefficients <- ggplot(data=Beta_df_Percentile_SM_Coherence,
                                   aes(x=Year,
                                       y=Beta,
                                       colour=Country_Inclusion_Criteria))+

  #### Grand Trends
  geom_point(data=Beta_df_Trends_Percentile_SM_Coherence,
             aes(group=Country_Inclusion_Criteria))+
  geom_line(data=Beta_df_Trends_Percentile_SM_Coherence,
            aes(group=Country_Inclusion_Criteria))+
  geom_ribbon(data=Beta_df_Trends_Percentile_SM_Coherence,
              aes(ymin=Beta-Beta_SE,
                  ymax=Beta+Beta_SE,
                  colour=Country_Inclusion_Criteria,
                  fill=Country_Inclusion_Criteria),
              alpha=0.2)+
  # ggrepel::geom_label_repel(data=subset(Beta_df_Trends_Percentile_SM_Coherence,
  #                                       Year==2010),
  #                           aes(fill=Country_Inclusion_Criteria,label=Country_Inclusion_Criteria,size=5),
  #                           fontface = 'bold', color = 'white',show.legend = F)+
  theme_bw() + 
  #geom_vline(xintercept = 2005,color="red",linetype="dashed")+
  ylab("Beta Coefficient\n(Standard Deviations)")+
  xlab("Years")+
  #ylim(c(0.15,0.35))+
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.title=element_blank())+
  theme(legend.background = element_rect(colour = "black"))+
  ggtitle("")+
  theme(plot.title = element_text(hjust = 0.5))+
  ggsci::scale_color_npg()+
  ggsci::scale_fill_npg()+
  facet_wrap(~Percentile_Cutoff_Labels)


# Coherence Cutoffs | Beta Domain =================================

Beta_df_DOMAIN <- plyr::ldply(list.files(pattern="OUTPUT_Python_Field_MRQAP_Beta_KLD_Citation_Corpus_RAKE_and_GoogleAPI_EnglishOnly_*",
                                         full.names=T), 
                              read.csv)%>%
  subset(Stars!="" & 
           Year>=minimum_year & 
           Year<=(maximum_year-Citation_Window_Criterion_)) %>%
  merge(.,subset(MAG_Field_ID_df,select=c("FieldID","Domain")),by.x=c("Discipline"),by.y=c("FieldID"),how="left") %>%
  subset(Coefficients!="Intercept") %>%
  as.data.frame() %>%
  subset(Coefficients%in%c("Citations_ji"))%>%
  subset(Percentile_Cutoff %in% c(0,0.25,0.5,0.75)) %>%
  tidyr::drop_na()

Beta_df_DOMAIN_Trends_SM_Coherence <- Beta_df_DOMAIN %>%
  dplyr::group_by(Domain,Coefficients,Year,Country_Inclusion_Criteria,Percentile_Cutoff,Citation_Type,Citation_Window,Journal_Censoring,Model_Type) %>%
  dplyr::summarise(Beta_Mean = mean(Beta,na.rm=T),
                   Beta_SE = Grand_SE(Beta/Test_Statistics)) %>%
  as.data.frame() %>%
  dplyr::rename("Beta"="Beta_Mean") %>%
  subset(Coefficients!="Intercept") %>%
  dplyr::mutate(Percentile_Cutoff_Labels = case_when(Percentile_Cutoff==0 ~ "All Inclusive",
                                                     Percentile_Cutoff==0.25 ~ "Above 25th\nPercentile",
                                                     Percentile_Cutoff==0.5 ~ "Above 50th\nPercentile",
                                                     Percentile_Cutoff==0.75 ~ "Above 75th\nPercentile"))


Beta_df_DOMAIN_Percentile_SM_Coherence <- Beta_df_DOMAIN %>%
  subset(Percentile_Cutoff!=Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Country_Inclusion_Criteria!='All_wo_Core')%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  as.data.frame()

Beta_df_DOMAIN_Trends_Percentile_SM_Coherence <- Beta_df_DOMAIN_Trends_SM_Coherence %>%
  subset(Percentile_Cutoff!=Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Country_Inclusion_Criteria!='All_wo_Core')%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  as.data.frame()


### Coherence Cutoffs | Beta Domain | Plot ############################# 

SM_Coherence_Cutoffs_Beta_Coefficients_DOMAIN <- ggplot(data=subset(Beta_df_DOMAIN_Percentile_SM_Coherence,
                                                      Year>=minimum_year & Year<=maximum_year),
                                          aes(x=Year,y=Beta,colour=Country_Inclusion_Criteria))+
  #geom_point(aes(group=Discipline),alpha=0.02)+
  #geom_line(aes(group=Discipline),alpha=0.02)+
  #geom_errorbar(aes(ymin=Beta-(Beta/Test_Statistics),
  #                  ymax=Beta+(Beta/Test_Statistics),
  #                  group=Discipline),alpha=0.02)+
  
  #### Grand Trends
  geom_point(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Coherence,Year>=minimum_year & Year<=maximum_year ),
             aes(group=Country_Inclusion_Criteria))+
  geom_line(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Coherence,Year>=minimum_year & Year<=maximum_year ),
            aes(group=Country_Inclusion_Criteria))+
  geom_ribbon(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Coherence,Year>=minimum_year & Year<=maximum_year),
              aes(ymin=Beta-Beta_SE,
                  ymax=Beta+Beta_SE,
                  fill=Country_Inclusion_Criteria,
                  color=Country_Inclusion_Criteria),
              alpha=0.2)+
  # ggrepel::geom_label_repel(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Coherence,
  #                                       Year==2012),
  #                           aes(fill=Country_Inclusion_Criteria,label=Country_Inclusion_Criteria,size=5),
  #                           fontface = 'bold', color = 'white',show.legend = F)+
  theme_bw() + 
  #geom_vline(xintercept = 2005,color="red",linetype="dashed")+
  ylab("Beta Coefficient\n(Standard Deviations)")+
  xlab("Years")+
  #ylim(c(0.1,0.4))+
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.title=element_blank())+
  theme(legend.background = element_rect(colour = "black"))+
  ggtitle("")+
  theme(plot.title = element_text(hjust = 0.5))+
  ggsci::scale_color_npg()+
  ggsci::scale_fill_npg()+
  facet_grid(Percentile_Cutoff_Labels~Domain)


# Deflated vs. Standard ---------------------------------

# Deflated vs. Standard | Beta  =================================

Beta_df_Trends_SM_Deflated <- Beta_df %>%
  dplyr::group_by(Coefficients,Year,Country_Inclusion_Criteria,Percentile_Cutoff,Citation_Type,Citation_Window,Journal_Censoring,Model_Type) %>%
  dplyr::summarise(Beta_Mean = mean(Beta,na.rm=T),
                   Beta_SE = Grand_SE(Beta/Test_Statistics)) %>%
  as.data.frame() %>%
  dplyr::rename("Beta"="Beta_Mean") %>%
  subset(Coefficients!="Intercept") %>%
  dplyr::mutate(Percentile_Cutoff_Labels = case_when(Percentile_Cutoff==0 ~ "All Inclusive",
                                                     Percentile_Cutoff==0.25 ~ "Above 25th\nPercentile",
                                                     Percentile_Cutoff==0.5 ~ "Above 50th\nPercentile",
                                                     Percentile_Cutoff==0.75 ~ "Above 75th\nPercentile")) %>%
  dplyr::mutate(Citation_Type_Label = case_when(Citation_Type=="No_Censoring"~"No Deflation",
                                            Citation_Type=="Field_Deflated"~"Field Deflated",
                                            Citation_Type=="Dyad_Deflated"~"Receiver Deflated"))


Beta_df_Percentile_SM_Deflated <- Beta_df %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Country_Inclusion_Criteria!='All_wo_Core')%>%
  dplyr::mutate(Citation_Type_Label = case_when(Citation_Type=="No_Censoring"~"No Deflation",
                                                Citation_Type=="Field_Deflated"~"Field Deflated",
                                                Citation_Type=="Dyad_Deflated"~"Receiver Deflated"))%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  as.data.frame()


Beta_df_Trends_Percentile_SM_Deflated <- Beta_df_Trends_SM_Coherence %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Country_Inclusion_Criteria!='All_wo_Core')%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  dplyr::mutate(Citation_Type_Label = case_when(Citation_Type=="No_Censoring"~"No Deflation",
                                                Citation_Type=="Field_Deflated"~"Field Deflated",
                                                Citation_Type=="Dyad_Deflated"~"Receiver Deflated"))%>%
  as.data.frame()

### Deflated vs. Standard | Beta | Plot ############################# 

SM_Deflated_Beta_Coefficients <- ggplot(data=Beta_df_Percentile_SM_Deflated,
                                                 aes(x=Year,
                                                     y=Beta,
                                                     colour=Country_Inclusion_Criteria))+
  # geom_point(aes(group=Discipline),alpha=0.02)+
  # geom_line(aes(group=Discipline),alpha=0.02)+
  # geom_errorbar(aes(ymin=Beta-(Beta/Test_Statistics),
  #                   ymax=Beta+(Beta/Test_Statistics),
  #                   group=Discipline),alpha=0.02)+
  
  #### Grand Trends
  geom_point(data=Beta_df_Trends_Percentile_SM_Deflated,
             aes(group=Country_Inclusion_Criteria))+
  geom_line(data=Beta_df_Trends_Percentile_SM_Deflated,
            aes(group=Country_Inclusion_Criteria))+
  geom_ribbon(data=Beta_df_Trends_Percentile_SM_Deflated,
              aes(ymin=Beta-Beta_SE,
                  ymax=Beta+Beta_SE,
                  fill=Country_Inclusion_Criteria,
                  color=Country_Inclusion_Criteria),
              alpha=0.2)+
  # ggrepel::geom_label_repel(data=subset(Beta_df_Trends_Percentile_SM_Deflated,
  #                                       Year==2012),
  #                           aes(fill=Country_Inclusion_Criteria,label=Country_Inclusion_Criteria,size=5),
  #                           fontface = 'bold', color = 'white',show.legend = F)+
  theme_bw() + 
  #geom_vline(xintercept = 2005,color="red",linetype="dashed")+
  ylab("Beta Coefficient\n(Standard Deviations)")+
  xlab("Years")+
  #ylim(c(0.15,0.35))+
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.title=element_blank())+
  theme(legend.background = element_rect(colour = "black"))+
  ggtitle("")+
  theme(plot.title = element_text(hjust = 0.5))+
  ggsci::scale_color_npg()+   
  ggsci::scale_fill_npg()+
  facet_wrap(~Citation_Type_Label)

# Deflated vs. Standard | Beta Domain =================================

Beta_df_DOMAIN_SM_Deflated <- plyr::ldply(list.files(pattern="OUTPUT_Python_Field_MRQAP_Beta_KLD_Citation_Corpus_RAKE_and_GoogleAPI_EnglishOnly_*",
                                         full.names=T), 
                              read.csv)%>%
  subset(Stars!="" & 
           Year>=minimum_year & 
           Year<=(maximum_year-Citation_Window_Criterion_)) %>%
  merge(.,subset(MAG_Field_ID_df,select=c("FieldID","Domain")),by.x=c("Discipline"),by.y=c("FieldID"),how="left") %>%
  subset(Coefficients!="Intercept") %>%
  as.data.frame() %>%
  subset(Coefficients%in%c("Citations_ji"))%>%
  subset(Percentile_Cutoff %in% c(0,0.25,0.5,0.75)) %>%
  tidyr::drop_na()

Beta_df_DOMAIN_Trends_SM_Deflated <- Beta_df_DOMAIN_SM_Deflated %>%
  dplyr::group_by(Domain,Coefficients,Year,Country_Inclusion_Criteria,Percentile_Cutoff,Citation_Type,Citation_Window,Journal_Censoring,Model_Type) %>%
  dplyr::summarise(Beta_Mean = mean(Beta,na.rm=T),
                   Beta_SE = Grand_SE(Beta/Test_Statistics)) %>%
  as.data.frame() %>%
  dplyr::rename("Beta"="Beta_Mean") %>%
  subset(Coefficients!="Intercept") %>%
  dplyr::mutate(Percentile_Cutoff_Labels = case_when(Percentile_Cutoff==0 ~ "All Inclusive",
                                                     Percentile_Cutoff==0.25 ~ "Above 25th\nPercentile",
                                                     Percentile_Cutoff==0.5 ~ "Above 50th\nPercentile",
                                                     Percentile_Cutoff==0.75 ~ "Above 75th\nPercentile"))%>%
  dplyr::mutate(Citation_Type_Label = case_when(Citation_Type=="No_Censoring"~"No Deflation",
                                                Citation_Type=="Field_Deflated"~"Field Deflated",
                                                Citation_Type=="Dyad_Deflated"~"Receiver Deflated"))



Beta_df_DOMAIN_Percentile_SM_Deflated <- Beta_df_DOMAIN_SM_Deflated %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Country_Inclusion_Criteria!='All_wo_Core')%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  dplyr::mutate(Citation_Type_Label = case_when(Citation_Type=="No_Censoring"~"No Deflation",
                                                Citation_Type=="Field_Deflated"~"Field Deflated",
                                                Citation_Type=="Dyad_Deflated"~"Receiver Deflated"))%>%
  as.data.frame()

Beta_df_DOMAIN_Trends_Percentile_SM_Deflated <- Beta_df_DOMAIN_Trends_SM_Deflated %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Country_Inclusion_Criteria!='All_wo_Core')%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  dplyr::mutate(Citation_Type_Label = case_when(Citation_Type=="No_Censoring"~"No Deflation",
                                                Citation_Type=="Field_Deflated"~"Field Deflated",
                                                Citation_Type=="Dyad_Deflated"~"Receiver Deflated"))%>%
  as.data.frame()

### Deflated vs. Standard | Beta Domain | Plot ############################# 

SM_Deflated_Beta_Coefficients_DOMAIN <- ggplot(data=subset(Beta_df_DOMAIN_Percentile_SM_Deflated,
                                                                    Year>=minimum_year & Year<=maximum_year),
                                                        aes(x=Year,y=Beta,colour=Country_Inclusion_Criteria))+
  # geom_point(aes(group=Discipline),alpha=0.02)+
  # geom_line(aes(group=Discipline),alpha=0.02)+
  # geom_errorbar(aes(ymin=Beta-(Beta/Test_Statistics),
  #                   ymax=Beta+(Beta/Test_Statistics),
  #                   group=Discipline),alpha=0.02)+
  
  #### Grand Trends
  geom_point(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Deflated,Year>=minimum_year & Year<=maximum_year ),
             aes(group=Country_Inclusion_Criteria))+
  geom_line(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Deflated,Year>=minimum_year & Year<=maximum_year ),
            aes(group=Country_Inclusion_Criteria))+
  geom_ribbon(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Deflated,Year>=minimum_year & Year<=maximum_year),
              aes(ymin=Beta-Beta_SE,
                  ymax=Beta+Beta_SE,
                  fill=Country_Inclusion_Criteria,
                  color=Country_Inclusion_Criteria),
              alpha=0.2)+
  # ggrepel::geom_label_repel(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Deflated,
  #                                       Year==2012),
  #                           aes(fill=Country_Inclusion_Criteria,label=Country_Inclusion_Criteria,size=5),
  #                           fontface = 'bold', color = 'white',show.legend = F)+
  theme_bw() + 
  #geom_vline(xintercept = 2005,color="red",linetype="dashed")+
  ylab("Beta Coefficient\n(Standard Deviations)")+
  xlab("Years")+
  #ylim(c(0.1,0.4))+
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.title=element_blank())+
  theme(legend.background = element_rect(colour = "black"))+
  ggtitle("")+
  theme(plot.title = element_text(hjust = 0.5))+
  ggsci::scale_color_npg()+   
  ggsci::scale_fill_npg()+
  facet_grid(Citation_Type_Label~Domain)


# Core vs. Periphery vs. All ---------------------------------

# Core vs. Periphery vs. All | Beta  =================================

Beta_df_Trends_SM_CorePeriphery <- Beta_df %>%
  dplyr::group_by(Coefficients,Year,Country_Inclusion_Criteria,Percentile_Cutoff,Citation_Type,Citation_Window,Journal_Censoring,Model_Type) %>%
  dplyr::summarise(Beta_Mean = mean(Beta,na.rm=T),
                   Beta_SE = Grand_SE(Beta/Test_Statistics)) %>%
  as.data.frame() %>%
  dplyr::rename("Beta"="Beta_Mean") %>%
  subset(Coefficients!="Intercept") %>%
  dplyr::mutate(Percentile_Cutoff_Labels = case_when(Percentile_Cutoff==0 ~ "All Inclusive",
                                                     Percentile_Cutoff==0.25 ~ "Above 25th\nPercentile",
                                                     Percentile_Cutoff==0.5 ~ "Above 50th\nPercentile",
                                                     Percentile_Cutoff==0.75 ~ "Above 75th\nPercentile"))


Beta_df_Percentile_SM_CorePeriphery <- Beta_df %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       as.character(Country_Inclusion_Criteria)=="All_w_Core"~"Core+Periphery",
                                                       as.character(Country_Inclusion_Criteria)=="All_wo_Core"~"Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core","Periphery")))%>%
  as.data.frame()

Beta_df_Trends_Percentile_SM_CorePeriphery <- Beta_df_Trends_SM_CorePeriphery %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       as.character(Country_Inclusion_Criteria)=="All_w_Core"~"Core+Periphery",
                                                       as.character(Country_Inclusion_Criteria)=="All_wo_Core"~"Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core","Periphery")))%>%
  as.data.frame()

### Core vs. Periphery vs. All | Beta | Plot ############################# 

SM_CorePeriphery_Beta_Coefficients <- ggplot(data=Beta_df_Percentile_SM_CorePeriphery,
                                             aes(x=Year,
                                                 y=Beta,
                                                 colour=Country_Inclusion_Criteria))+

  #### Grand Trends
  geom_point(data=Beta_df_Trends_Percentile_SM_CorePeriphery,
             aes(group=Country_Inclusion_Criteria))+
  geom_line(data=Beta_df_Trends_Percentile_SM_CorePeriphery,
            aes(group=Country_Inclusion_Criteria))+
  geom_ribbon(data=Beta_df_Trends_Percentile_SM_CorePeriphery,
              aes(ymin=Beta-Beta_SE,
                  ymax=Beta+Beta_SE,
                  fill=Country_Inclusion_Criteria,
                  color=Country_Inclusion_Criteria),
              alpha=0.2)+
  # ggrepel::geom_label_repel(data=subset(Beta_df_Trends_Percentile_SM_CorePeriphery,
  #                                       Year==2012),
  #                           aes(fill=Country_Inclusion_Criteria,label=Country_Inclusion_Criteria,size=5),
  #                           fontface = 'bold', color = 'white',show.legend = F)+
  theme_bw() + 
  #geom_vline(xintercept = 2005,color="red",linetype="dashed")+
  ylab("Beta Coefficient\n(Standard Deviations)")+
  xlab("Years")+
  #ylim(c(0.05,0.35))+
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.title=element_blank())+
  theme(legend.background = element_rect(colour = "black"))+
  ggtitle("")+
  theme(plot.title = element_text(hjust = 0.5))+
  ggsci::scale_color_npg()+
  ggsci::scale_fill_npg()

# Core vs. Periphery vs. All | Beta Domain  =================================

Beta_df_DOMAIN_Trends_SM_CorePeriphery <- Beta_df_DOMAIN %>%
  dplyr::group_by(Domain,Coefficients,Year,Country_Inclusion_Criteria,Percentile_Cutoff,Citation_Type,Citation_Window,Journal_Censoring,Model_Type) %>%
  dplyr::summarise(Beta_Mean = mean(Beta,na.rm=T),
                   Beta_SE = Grand_SE(Beta/Test_Statistics)) %>%
  as.data.frame() %>%
  dplyr::rename("Beta"="Beta_Mean") %>%
  subset(Coefficients!="Intercept") %>%
  dplyr::mutate(Percentile_Cutoff_Labels = case_when(Percentile_Cutoff==0 ~ "All Inclusive",
                                                     Percentile_Cutoff==0.25 ~ "Above 25th\nPercentile",
                                                     Percentile_Cutoff==0.5 ~ "Above 50th\nPercentile",
                                                     Percentile_Cutoff==0.75 ~ "Above 75th\nPercentile")) %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       as.character(Country_Inclusion_Criteria)=="All_w_Core"~"Core+Periphery",
                                                       as.character(Country_Inclusion_Criteria)=="All_wo_Core"~"Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core","Periphery")))%>%
  as.data.frame()

Beta_df_DOMAIN_Trends_Percentile_SM_CorePeriphery <- Beta_df_DOMAIN %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       as.character(Country_Inclusion_Criteria)=="All_w_Core"~"Core+Periphery",
                                                       as.character(Country_Inclusion_Criteria)=="All_wo_Core"~"Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core","Periphery")))%>%
  as.data.frame()

### Core vs. Periphery vs. All | Beta Domain | Plot ############################# 

SM_CorePeriphery_Beta_Coefficients_DOMAIN <- ggplot(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_CorePeriphery,
                                                                Year>=minimum_year & Year<=maximum_year-5),
                                                    aes(x=Year,y=Beta,colour=Country_Inclusion_Criteria))+

  #### Grand Trends
  geom_point(data=subset(Beta_df_DOMAIN_Trends_SM_CorePeriphery,
                         Year>=minimum_year & Year<=maximum_year-5 ),
             aes(group=Country_Inclusion_Criteria))+
  geom_line(data=subset(Beta_df_DOMAIN_Trends_SM_CorePeriphery,
                        Year>=minimum_year & Year<=maximum_year-5 ),
            aes(group=Country_Inclusion_Criteria))+
  geom_ribbon(data=subset(Beta_df_DOMAIN_Trends_SM_CorePeriphery,
                          Year>=minimum_year & Year<=maximum_year-5),
              aes(ymin=Beta-Beta_SE,
                  ymax=Beta+Beta_SE,
                  fill=Country_Inclusion_Criteria,
                  color=Country_Inclusion_Criteria),
              alpha=0.2)+
  # ggrepel::geom_label_repel(data=subset(Beta_df_DOMAIN_Trends_SM_CorePeriphery,
  #                                       Year==2012),
  #                           aes(fill=Country_Inclusion_Criteria,label=Country_Inclusion_Criteria,size=5),
  #                           fontface = 'bold', color = 'white',show.legend = F)+
  theme_bw() + 
  #geom_vline(xintercept = 2005,color="red",linetype="dashed")+
  ylab("Beta Coefficient\n(Standard Deviations)")+
  xlab("Years")+
  #ylim(c(0.00,0.45))+
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.title=element_blank())+
  theme(legend.background = element_rect(colour = "black"))+
  ggtitle("")+
  theme(plot.title = element_text(hjust = 0.5))+
  ggsci::scale_color_npg()+   
  ggsci::scale_fill_npg()+
  facet_grid(~Domain)


# English vs. Non-English ---------------------------------

Beta_df_NonEnglish <- plyr::ldply(list.files(pattern="OUTPUT_Python_Field_MRQAP_Beta_KLD_Citation_Corpus_RAKE_and_GoogleAPI_All_*",
                                  full.names=T), 
                       read.csv) %>%
  subset(Stars!="" & 
           Year>=minimum_year & 
           Year<=(maximum_year-Citation_Window_Criterion_)) %>%
  merge(.,subset(MAG_Field_ID_df,select=c("FieldID","Name")),by.x=c("Discipline"),by.y=c("FieldID"),how="left") %>%
  subset(Coefficients!="Intercept") %>%
  as.data.frame() %>%
  subset(Coefficients%in%c("Citations_ji"))%>%
  subset(Percentile_Cutoff %in% c(0,0.25,0.5,0.75)) %>%
  tidyr::drop_na()

# Note - Bind NonEnglish (aka Non-English) from previous data batch with Current Data batch (aka English Only)
Beta_df_SM_Language <- Beta_df_NonEnglish %>%
  dplyr::mutate(Language="All")%>%
  subset(Citation_Type=="Deflated" & Citation_Window==5)%>%
  subset(select=c("Discipline","Coefficients","Beta","Year","Stars","Model_Type","Language","Test_Statistics","Country_Inclusion_Criteria"))%>%
  rbind(.,Beta_df %>% 
          dplyr::mutate(Language="English_Only")%>%
          subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
          subset(Citation_Type==Inflation_Criterion_) %>%
          subset(Citation_Window==Citation_Window_Criterion_)%>%
          subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
          subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
          subset(select=c("Discipline","Coefficients","Beta","Year","Stars","Model_Type","Language","Test_Statistics","Country_Inclusion_Criteria"))) 

Beta_df_DOMAIN_NonEnglish <- plyr::ldply(list.files(pattern="OUTPUT_Python_Field_MRQAP_Beta_KLD_Citation_Corpus_RAKE_and_GoogleAPI_All_*",
                                                    full.names=T), 
                                         read.csv)%>%
  subset(Stars!="" & 
           Year>=minimum_year & 
           Year<=(maximum_year-Citation_Window_Criterion_)) %>%
  merge(.,subset(MAG_Field_ID_df,select=c("FieldID","Domain")),by.x=c("Discipline"),by.y=c("FieldID"),how="left") %>%
  subset(Coefficients!="Intercept") %>%
  as.data.frame() %>%
  subset(Coefficients%in%c("Citations_ji"))%>%
  subset(Percentile_Cutoff %in% c(0,0.25,0.5,0.75)) %>%
  tidyr::drop_na()

Beta_df_DOMAIN_SM_Language <- Beta_df_DOMAIN_NonEnglish %>%
  dplyr::mutate(Language="All")%>%
  subset(Citation_Type=="Deflated" & Citation_Window==5)%>%
  subset(select=c("Domain","Discipline","Coefficients","Beta","Year","Stars","Model_Type","Language","Test_Statistics","Country_Inclusion_Criteria"))%>%
  rbind(.,Beta_df_DOMAIN %>% 
          dplyr::mutate(Language="English_Only")%>%
          subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
          subset(Citation_Type==Inflation_Criterion_) %>%
          subset(Citation_Window==Citation_Window_Criterion_)%>%
          subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
          subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
          subset(select=c("Domain","Discipline","Coefficients","Beta","Year","Stars","Model_Type","Language","Test_Statistics","Country_Inclusion_Criteria"))) 

# English vs. Non-English | Beta  =================================

Beta_df_Trends_SM_Language <- Beta_df_SM_Language %>%
  dplyr::group_by(Coefficients,Year,Country_Inclusion_Criteria,Model_Type,Language) %>%
  dplyr::summarise(Beta_Mean = mean(Beta,na.rm=T),
                   Beta_SE = Grand_SE(Beta/Test_Statistics)) %>%
  as.data.frame() %>%
  dplyr::rename("Beta"="Beta_Mean") %>%
  subset(Coefficients!="Intercept") 


Beta_df_Percentile_SM_Language <- Beta_df_SM_Language %>%
  subset(Country_Inclusion_Criteria!="All_wo_Core") %>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All_w_Core"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  dplyr::mutate(Language = case_when(as.character(Language)=="English_Only"~"English Only",
                                     TRUE~"English and\nTranslated"))%>%
  as.data.frame()

Beta_df_Trends_Percentile_SM_Language <- Beta_df_Trends_SM_Language %>%
  subset(Country_Inclusion_Criteria!="All_wo_Core") %>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All_w_Core"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  dplyr::mutate(Language = case_when(as.character(Language)=="English_Only"~"English Only",
                                     TRUE~"English and\nTranslated"))%>%
  as.data.frame()

### English vs. Non-English | Beta | Plot ############################# 

SM_Language_Beta_Coefficients <- ggplot(data=Beta_df_Percentile_SM_Language,
                                        aes(x=Year,
                                            y=Beta,
                                            colour=Country_Inclusion_Criteria))+
  # geom_point(aes(group=Discipline),alpha=0.02)+
  # geom_line(aes(group=Discipline),alpha=0.02)+
  # geom_errorbar(aes(ymin=Beta-(Beta/Test_Statistics),
  #                   ymax=Beta+(Beta/Test_Statistics),
  #                   group=Discipline),alpha=0.02)+
  
  #### Grand Trends
  geom_point(data=Beta_df_Trends_Percentile_SM_Language,
             aes(group=Country_Inclusion_Criteria))+
  geom_line(data=Beta_df_Trends_Percentile_SM_Language,
            aes(group=Country_Inclusion_Criteria))+
  geom_ribbon(data=Beta_df_Trends_Percentile_SM_Language,
              aes(ymin=Beta-Beta_SE,
                  ymax=Beta+Beta_SE,
                  fill=Country_Inclusion_Criteria,
                  color=Country_Inclusion_Criteria),
              alpha=0.2)+
  # ggrepel::geom_label_repel(data=subset(Beta_df_Trends_Percentile_SM_Language,
  #                                       Year==2012),
  #                           aes(fill=Country_Inclusion_Criteria,label=Country_Inclusion_Criteria,size=5),
  #                           fontface = 'bold', color = 'white',show.legend = F)+
  theme_bw() + 
  #geom_vline(xintercept = 2005,color="red",linetype="dashed")+
  ylab("Beta Coefficient\n(Standard Deviations)")+
  xlab("Years")+
  #ylim(c(0.15,0.35))+
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.title=element_blank())+
  theme(legend.background = element_rect(colour = "black"))+
  ggtitle("")+
  theme(plot.title = element_text(hjust = 0.5))+
  ggsci::scale_color_npg()+   
  ggsci::scale_fill_npg()+
  facet_wrap(~Language)


# English vs. Non-English | Beta Domain =================================


Beta_df_DOMAIN_Trends_SM_Language <- Beta_df_DOMAIN_SM_Language %>%
  dplyr::group_by(Domain,Coefficients,Year,Country_Inclusion_Criteria,Language) %>%
  dplyr::summarise(Beta_Mean = mean(Beta,na.rm=T),
                   Beta_SE = Grand_SE(Beta/Test_Statistics)) %>%
  as.data.frame() %>%
  dplyr::rename("Beta"="Beta_Mean") %>%
  subset(Coefficients!="Intercept") 


Beta_df_DOMAIN_Percentile_SM_Language <- Beta_df_DOMAIN_SM_Language %>%
  subset(Country_Inclusion_Criteria!="All_wo_Core") %>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All_w_Core"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  dplyr::mutate(Language = case_when(as.character(Language)=="English_Only"~"English Only",
                                     TRUE~"English and\nTranslated"))%>%
  as.data.frame()

Beta_df_DOMAIN_Trends_Percentile_SM_Language <- Beta_df_DOMAIN_Trends_SM_Language %>%
  subset(Country_Inclusion_Criteria!="All_wo_Core") %>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All_w_Core"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  dplyr::mutate(Language = case_when(as.character(Language)=="English_Only"~"English Only",
                                     TRUE~"English and\nTranslated"))%>%
  as.data.frame()

### English vs. Non-English | Beta Domain | Plot ############################# 

SM_Language_Beta_Coefficients_DOMAIN <- ggplot(data=subset(Beta_df_DOMAIN_Percentile_SM_Language,
                                                           Year>=minimum_year & Year<=maximum_year),
                                               aes(x=Year,y=Beta,colour=Country_Inclusion_Criteria))+

  #### Grand Trends
  geom_point(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Language,Year>=minimum_year & Year<=maximum_year ),
             aes(group=Country_Inclusion_Criteria))+
  geom_line(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Language,Year>=minimum_year & Year<=maximum_year ),
            aes(group=Country_Inclusion_Criteria))+
  geom_ribbon(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Language,Year>=minimum_year & Year<=maximum_year),
              aes(ymin=Beta-Beta_SE,
                  ymax=Beta+Beta_SE,
                  fill=Country_Inclusion_Criteria,
                  color=Country_Inclusion_Criteria),
              alpha=0.2)+
  # ggrepel::geom_label_repel(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Language,
  #                                       Year==2012),
  #                           aes(fill=Country_Inclusion_Criteria,label=Country_Inclusion_Criteria,size=5),
  #                           fontface = 'bold', color = 'white',show.legend = F)+
  theme_bw() + 
  #geom_vline(xintercept = 2005,color="red",linetype="dashed")+
  ylab("Beta Coefficient\n(Standard Deviations)")+
  xlab("Years")+
  #ylim(c(0.1,0.4))+
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.title=element_blank())+
  theme(legend.background = element_rect(colour = "black"))+
  ggtitle("")+
  theme(plot.title = element_text(hjust = 0.5))+
  ggsci::scale_color_npg()+   
  ggsci::scale_fill_npg()+
  facet_grid(Language~Domain)


# Journal Censored vs. All Journals =========================
# Journal Censored vs. All Journals | Beta =========================

Beta_df_Trends_SM_JournalCensored <- Beta_df %>%
  dplyr::group_by(Coefficients,Year,Country_Inclusion_Criteria,Percentile_Cutoff,Citation_Type,Citation_Window,Journal_Censoring,Model_Type) %>%
  dplyr::summarise(Beta_Mean = mean(Beta,na.rm=T),
                   Beta_SE = Grand_SE(Beta/Test_Statistics)) %>%
  as.data.frame() %>%
  dplyr::rename("Beta"="Beta_Mean") %>%
  subset(Coefficients!="Intercept") %>%
  dplyr::mutate(Percentile_Cutoff_Labels = case_when(Percentile_Cutoff==0 ~ "All Inclusive",
                                                     Percentile_Cutoff==0.25 ~ "Above 25th\nPercentile",
                                                     Percentile_Cutoff==0.5 ~ "Above 50th\nPercentile",
                                                     Percentile_Cutoff==0.75 ~ "Above 75th\nPercentile")) %>%
  dplyr::mutate(Journal_Censoring_Label = case_when(Journal_Censoring=="No_Censoring"~"No Journal\nCensoring",
                                              TRUE~"Journals Censored\nSince 1980"))


Beta_df_Percentile_SM_JournalCensored <- Beta_df %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Country_Inclusion_Criteria!="All_wo_Core") %>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All_w_Core"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
   dplyr::mutate(Journal_Censoring_Label = case_when(Journal_Censoring=="No_Censoring"~"No Journal\nCensoring",
                                                    TRUE~"Journals Censored\nSince 1980"))%>%
  as.data.frame()

Beta_df_Trends_Percentile_SM_JournalCensored <- Beta_df_Trends_SM_Coherence %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Country_Inclusion_Criteria!="All_wo_Core") %>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All_w_Core"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  dplyr::mutate(Journal_Censoring_Label = case_when(Journal_Censoring=="No_Censoring"~"No Journal\nCensoring",
                                                    TRUE~"Journals Censored\nSince 1980"))%>%
  as.data.frame()

# Journal Censored vs. All Journals | Beta Plot =========================

SM_JournalCensored_Beta_Coefficients <- ggplot(data=Beta_df_Percentile_SM_JournalCensored,
                                        aes(x=Year,
                                            y=Beta,
                                            colour=Country_Inclusion_Criteria))+

  #### Grand Trends
  geom_point(data=Beta_df_Trends_Percentile_SM_JournalCensored,
             aes(group=Country_Inclusion_Criteria))+
  geom_line(data=Beta_df_Trends_Percentile_SM_JournalCensored,
            aes(group=Country_Inclusion_Criteria))+
  geom_ribbon(data=Beta_df_Trends_Percentile_SM_JournalCensored,
              aes(ymin=Beta-Beta_SE,
                  ymax=Beta+Beta_SE,
                  fill=Country_Inclusion_Criteria,
                  color=Country_Inclusion_Criteria),
              alpha=0.2)+
  # ggrepel::geom_label_repel(data=subset(Beta_df_Trends_Percentile_SM_JournalCensored,
  #                                       Year==2012),
  #                           aes(fill=Country_Inclusion_Criteria,label=Country_Inclusion_Criteria,size=5),
  #                           fontface = 'bold', color = 'white',show.legend = F)+
  theme_bw() + 
  #geom_vline(xintercept = 2005,color="red",linetype="dashed")+
  ylab("Beta Coefficient\n(Standard Deviations)")+
  xlab("Years")+
  #ylim(c(0.2,0.5))+
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.title=element_blank())+
  theme(legend.background = element_rect(colour = "black"))+
  ggtitle("")+
  theme(plot.title = element_text(hjust = 0.5))+
  ggsci::scale_color_npg()+   
  ggsci::scale_fill_npg()+
  facet_wrap(~Journal_Censoring_Label)


# Journal Censored vs. All Journals | Beta Domain =========================

Beta_df_DOMAIN_SM_JournalCensored <- plyr::ldply(list.files(pattern="OUTPUT_Python_Field_MRQAP_Beta_KLD_Citation_Corpus_RAKE_and_GoogleAPI_EnglishOnly_*",
                                                     full.names=T), 
                                          read.csv)%>%
  subset(Stars!="" & 
           Year>=minimum_year & 
           Year<=(maximum_year-Citation_Window_Criterion_)) %>%
  merge(.,subset(MAG_Field_ID_df,select=c("FieldID","Domain")),by.x=c("Discipline"),by.y=c("FieldID"),how="left") %>%
  subset(Coefficients!="Intercept") %>%
  as.data.frame() %>%
  subset(Coefficients%in%c("Citations_ji"))%>%
  subset(Percentile_Cutoff %in% c(0,0.25,0.5,0.75)) %>%
  tidyr::drop_na()

Beta_df_DOMAIN_Trends_SM_JournalCensored <- Beta_df_DOMAIN_SM_JournalCensored %>%
  dplyr::group_by(Domain,Coefficients,Year,Country_Inclusion_Criteria,Percentile_Cutoff,Citation_Type,Citation_Window,Journal_Censoring,Model_Type) %>%
  dplyr::summarise(Beta_Mean = mean(Beta,na.rm=T),
                   Beta_SE = Grand_SE(Beta/Test_Statistics)) %>%
  as.data.frame() %>%
  dplyr::rename("Beta"="Beta_Mean") %>%
  subset(Coefficients!="Intercept") %>%
  dplyr::mutate(Percentile_Cutoff_Labels = case_when(Percentile_Cutoff==0 ~ "All Inclusive",
                                                     Percentile_Cutoff==0.25 ~ "Above 25th\nPercentile",
                                                     Percentile_Cutoff==0.5 ~ "Above 50th\nPercentile",
                                                     Percentile_Cutoff==0.75 ~ "Above 75th\nPercentile"))%>%
  dplyr::mutate(Journal_Censoring_Label = case_when(Journal_Censoring=="No_Censoring"~"No Journal\nCensoring",
                                                    TRUE~"Journals Censored\nSince 1980"))


Beta_df_DOMAIN_Percentile_SM_JournalCensored <- Beta_df_DOMAIN_SM_JournalCensored %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Citation_Type==Inflation_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Country_Inclusion_Criteria!='All_wo_Core')%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All_w_Core"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
   dplyr::mutate(Journal_Censoring_Label = case_when(Journal_Censoring=="No_Censoring"~"No Journal\nCensoring",
                                                    TRUE~"Journals Censored\nSince 1980"))%>%
  as.data.frame()

Beta_df_DOMAIN_Trends_Percentile_SM_JournalCensored <- Beta_df_DOMAIN_Trends_SM_JournalCensored %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_)%>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Country_Inclusion_Criteria!='All_wo_Core')%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All_w_Core"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  dplyr::mutate(Journal_Censoring_Label = case_when(Journal_Censoring=="No_Censoring"~"No Journal\nCensoring",
                                                    TRUE~"Journals Censored\nSince 1980"))%>%
  as.data.frame()


# Journal Censored vs. All Journals | Beta Domain Plot =========================

SM_JournalCensored_Beta_Coefficients_DOMAIN <-  ggplot(data=subset(Beta_df_DOMAIN_Percentile_SM_JournalCensored,
                                                                   Year>=minimum_year & Year<=maximum_year),
                                                       aes(x=Year,y=Beta,colour=Country_Inclusion_Criteria))+

  #### Grand Trends
  geom_point(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_JournalCensored,Year>=minimum_year & Year<=maximum_year ),
             aes(group=Country_Inclusion_Criteria))+
  geom_line(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_JournalCensored,Year>=minimum_year & Year<=maximum_year ),
            aes(group=Country_Inclusion_Criteria))+
  geom_ribbon(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_JournalCensored,Year>=minimum_year & Year<=maximum_year),
              aes(ymin=Beta-Beta_SE,
                  ymax=Beta+Beta_SE,
                  fill=Country_Inclusion_Criteria,
                  color=Country_Inclusion_Criteria),
              alpha=0.2)+
  # ggrepel::geom_label_repel(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_JournalCensored,
  #                                       Year==2012),
  #                           aes(fill=Country_Inclusion_Criteria,label=Country_Inclusion_Criteria,size=5),
  #                           fontface = 'bold', color = 'white',show.legend = F)+
  theme_bw() + 
  #geom_vline(xintercept = 2005,color="red",linetype="dashed")+
  ylab("Beta Coefficient\n(Standard Deviations)")+
  xlab("Years")+
  #ylim(c(0.1,0.55))+
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.title=element_blank())+
  theme(legend.background = element_rect(colour = "black"))+
  ggtitle("")+
  theme(plot.title = element_text(hjust = 0.5))+
  ggsci::scale_color_npg()+   
  ggsci::scale_fill_npg()+
  facet_grid(Journal_Censoring_Label~Domain)

# All Statistically-Significant and Not Significant Betas  =========================

Beta_df_SM_Significant <- plyr::ldply(list.files(pattern="OUTPUT_Python_Field_MRQAP_Beta_KLD_Citation_Corpus_RAKE_and_GoogleAPI_EnglishOnly_*",
                                  full.names=T), 
                       read.csv) %>%
  
  # 2022 02 20 - Test Without Significance. 
  subset(Year>=minimum_year & 
           Year<=(maximum_year-Citation_Window_Criterion_)) %>%
  
  merge(.,subset(MAG_Field_ID_df,select=c("FieldID","Name")),by.x=c("Discipline"),by.y=c("FieldID"),how="left") %>%
  subset(Coefficients!="Intercept") %>%
  as.data.frame() %>%
  subset(Coefficients%in%c("Citations_ji"))%>%
  subset(Percentile_Cutoff %in% c(0,0.25,0.5,0.75)) %>%
  tidyr::drop_na()

# All Statistically-Significant and Not Significant Betas | Beta  =========================

Beta_df_Trends_SM_Significant <- Beta_df_SM_Significant %>%
  dplyr::group_by(Coefficients,Year,Country_Inclusion_Criteria,Percentile_Cutoff,Citation_Type,Citation_Window,Journal_Censoring,Model_Type) %>%
  dplyr::summarise(Beta_Mean = mean(Beta,na.rm=T),
                   Beta_SE = Grand_SE(Beta/Test_Statistics),
                   N=length(Beta)) %>%
  as.data.frame() %>%
  dplyr::rename("Beta"="Beta_Mean") %>%
  subset(Coefficients!="Intercept") %>%
  dplyr::mutate(Percentile_Cutoff_Labels = case_when(Percentile_Cutoff==0 ~ "All Inclusive",
                                                     Percentile_Cutoff==0.25 ~ "Above 25th\nPercentile",
                                                     Percentile_Cutoff==0.5 ~ "Above 50th\nPercentile",
                                                     Percentile_Cutoff==0.75 ~ "Above 75th\nPercentile"))


Beta_df_Percentile_SM_Significant <- Beta_df_SM_Significant %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff)) %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Country_Inclusion_Criteria!='All_wo_Core')%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  as.data.frame()

Beta_df_SM_Significant_Trends_Percentile <- Beta_df_Trends_SM_Significant %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff))%>%
  subset(Model_Type=="Citations_ji_and_NumDiffijPapers")%>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Country_Inclusion_Criteria!='All_wo_Core')%>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  subset(Journal_Censoring==Journal_Censoring_Criterion_)%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  as.data.frame()

# All Statistically-Significant and Not Significant Betas | Beta Plot  =========================

SM_Significant_Beta_Coefficients <- ggplot(data=Beta_df_Percentile_SM_Significant,
                                   aes(x=Year,
                                       y=Beta,
                                       colour=Country_Inclusion_Criteria))+
  #geom_point(aes(group=Discipline),alpha=0.02)+
  #geom_line(aes(group=Discipline),alpha=0.02)+
  #geom_errorbar(aes(ymin=Beta-(Beta/Test_Statistics),
  #                  ymax=Beta+(Beta/Test_Statistics),
  #                  group=Discipline),alpha=0.02)+
  
  #### Grand Trends
  geom_point(data=Beta_df_SM_Significant_Trends_Percentile,
             aes(group=Country_Inclusion_Criteria))+
  geom_line(data=Beta_df_SM_Significant_Trends_Percentile,
            aes(group=Country_Inclusion_Criteria))+
  geom_ribbon(data=Beta_df_SM_Significant_Trends_Percentile,
              aes(ymin=Beta-Beta_SE,
                  ymax=Beta+Beta_SE,
                  colour=Country_Inclusion_Criteria,
                  fill=Country_Inclusion_Criteria),
              alpha=0.2)+
  # ggrepel::geom_label_repel(data=subset(Beta_df_SM_Significant_Trends_Percentile,
  #                                       Year==2012),
  #                           aes(fill=Country_Inclusion_Criteria,label=Country_Inclusion_Criteria,size=5),
  #                           fontface = 'bold', color = 'white',show.legend = F)+
  theme_bw() + 
  #geom_vline(xintercept = 2005,color="red",linetype="dashed")+
  ylab("Beta Coefficient\n(Standard Deviations)")+
  xlab("Years")+
  #ylim(c(0.2,0.35))+ #Original c(0.15,0.45)
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.title=element_blank())+
  theme(legend.background = element_rect(colour = "black"))+
  ggtitle("")+
  theme(plot.title = element_text(hjust = 0.5))+
  ggsci::scale_color_npg()+
  ggsci::scale_fill_npg()


# All Statistically-Significant and Not Significant Betas | Beta Domain  =========================

Beta_df_DOMAIN_SM_Significant <- plyr::ldply(list.files(pattern="OUTPUT_Python_Field_MRQAP_Beta_KLD_Citation_Corpus_RAKE_and_GoogleAPI_EnglishOnly_*",
                                         full.names=T), 
                              read.csv)%>%
  subset(Year>=minimum_year & 
           Year<=(maximum_year-Citation_Window_Criterion_)) %>%
  merge(.,subset(MAG_Field_ID_df,select=c("FieldID","Domain")),by.x=c("Discipline"),by.y=c("FieldID"),how="left") %>%
  subset(Coefficients!="Intercept") %>%
  as.data.frame() %>%
  subset(Coefficients%in%c("Citations_ji"))%>%
  subset(Percentile_Cutoff %in% c(0,0.25,0.5,0.75)) %>%
  tidyr::drop_na()

Beta_df_DOMAIN_Trends_SM_Significant <- Beta_df_DOMAIN_SM_Significant %>%
  dplyr::group_by(Domain,Coefficients,Year,Country_Inclusion_Criteria,Percentile_Cutoff,Citation_Window,Citation_Type) %>%
  dplyr::summarise(Beta_Mean = mean(Beta,na.rm=T),
                   Beta_SE = Grand_SE(Beta/Test_Statistics)) %>%
  as.data.frame() %>%
  dplyr::rename("Beta"="Beta_Mean") %>%
  subset(Coefficients!="Intercept") %>%
  dplyr::mutate(Percentile_Cutoff_Labels = case_when(Percentile_Cutoff==0 ~ "All Inclusive",
                                                     Percentile_Cutoff==0.25 ~ "Above 25th\nPercentile",
                                                     Percentile_Cutoff==0.5 ~ "Above 50th\nPercentile",
                                                     Percentile_Cutoff==0.75 ~ "Above 75th\nPercentile"))


Beta_df_DOMAIN_Percentile_SM_Significant <- Beta_df_DOMAIN_SM_Significant %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff))%>%
  subset(Country_Inclusion_Criteria!="All_wo_Core") %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  as.data.frame()

Beta_df_DOMAIN_Trends_Percentile_SM_Significant <- Beta_df_DOMAIN_Trends_SM_Significant %>%
  subset(Percentile_Cutoff==Percentile_Cutoff_Criterion_) %>%
  subset(select=-c(Percentile_Cutoff))%>%
  subset(Country_Inclusion_Criteria!="All_wo_Core") %>%
  subset(Citation_Type==Inflation_Criterion_) %>%
  subset(Citation_Window==Citation_Window_Criterion_)%>%
  dplyr::mutate(Country_Inclusion_Criteria = case_when(as.character(Country_Inclusion_Criteria)=="All"~"Core+Periphery",
                                                       TRUE~as.character(Country_Inclusion_Criteria)))%>%
  dplyr::mutate(Country_Inclusion_Criteria = factor(Country_Inclusion_Criteria,levels=c("Core+Periphery","Core")))%>%
  as.data.frame()

# All Statistically-Significant and Not Significant Betas | Beta Domain Plot  =========================

SM_Significant_Beta_Coefficients_DOMAIN <- ggplot(data=subset(Beta_df_DOMAIN_Percentile_SM_Significant,
                                                      Year>=minimum_year & Year<=maximum_year & Model_Type=="Citations_ji"),
                                          aes(x=Year,y=Beta,colour=Country_Inclusion_Criteria))+
  
  #### Grand Trends
  geom_point(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Significant,Year>=minimum_year & Year<=maximum_year ),
             aes(group=Country_Inclusion_Criteria))+
  geom_line(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Significant,Year>=minimum_year & Year<=maximum_year ),
            aes(group=Country_Inclusion_Criteria))+
  geom_ribbon(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Significant,Year>=minimum_year & Year<=maximum_year),
              aes(ymin=Beta-Beta_SE,
                  ymax=Beta+Beta_SE,
                  fill=Country_Inclusion_Criteria,
                  color=Country_Inclusion_Criteria),
              alpha=0.2)+
  # ggrepel::geom_label_repel(data=subset(Beta_df_DOMAIN_Trends_Percentile_SM_Significant,
  #                                       Year==2012),
  #                           aes(fill=Country_Inclusion_Criteria,label=Country_Inclusion_Criteria,size=5),
  #                           fontface = 'bold', color = 'white',show.legend = F)+
  theme_bw() + 
  ylab("Beta Coefficient\n(Standard Deviations)")+
  xlab("Years")+
  #ylim(c(0.1,0.4))+
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.title=element_blank())+
  theme(legend.background = element_rect(colour = "black"))+
  ggtitle("")+
  theme(plot.title = element_text(hjust = 0.5))+
  ggsci::scale_color_npg()+
  ggsci::scale_fill_npg()+
  facet_grid(~Domain)

# Print Plots =================================

pdf(paste("SM_Figure_Coherence_Cutoffs_Num_Countries.pdf",sep=""),width=(8*1.25), height=8*1)
SM_Coherence_Cutoffs_Num_Countries
dev.off()

pdf(paste("SM_Figure_Coherence_Cutoffs_Beta_Coefficients.pdf",sep=""),width=(8*1.5), height=8*1)
SM_Coherence_Cutoffs_Beta_Coefficients
dev.off()

pdf(paste("SM_Figure_Coherence_Cutoffs_Beta_Coefficients_DOMAIN.pdf",sep=""),width=(8*1.25), height=8*1.0)
SM_Coherence_Cutoffs_Beta_Coefficients_DOMAIN
dev.off()

pdf(paste("SM_Figure_Deflated_Beta_Coefficients.pdf",sep=""),width=(8*1.75), height=8*1)
SM_Deflated_Beta_Coefficients
dev.off()

pdf(paste("SM_Figure_Deflated_Beta_Coefficients_DOMAIN.pdf",sep=""),width=(8*1.75), height=8*1)
SM_Deflated_Beta_Coefficients_DOMAIN
dev.off()

pdf(paste("SM_Figure_CorePeriphery_Beta_Coefficients.pdf",sep=""),width=(8*1.25), height=8*1.25)
SM_CorePeriphery_Beta_Coefficients
dev.off()

pdf(paste("SM_Figure_CorePeriphery_Beta_Coefficients_DOMAIN.pdf",sep=""),width=(8*1.75), height=8*1.0)
SM_CorePeriphery_Beta_Coefficients_DOMAIN
dev.off()

pdf(paste("SM_Figure_Language_Beta_Coefficients.pdf",sep=""),width=(8*1.25), height=8*1)
SM_Language_Beta_Coefficients
dev.off()

pdf(paste("SM_Figure_Language_Beta_Coefficients_DOMAIN.pdf",sep=""),width=(8*1.25), height=8*1)
SM_Language_Beta_Coefficients_DOMAIN
dev.off()

pdf(paste("SM_Figure_JournalCensored_Beta_Coefficients.pdf",sep=""),width=(8*1.75), height=8*1)
SM_JournalCensored_Beta_Coefficients
dev.off()

pdf(paste("SM_Figure_JournalCensored_Beta_Coefficients_DOMAIN.pdf",sep=""),width=(8*1.5), height=8*1.5)
SM_JournalCensored_Beta_Coefficients_DOMAIN
dev.off()


pdf(paste("SM_Figure_Significant_Beta_Coefficients.pdf",sep=""),width=(8*1.5), height=8*1.5)
SM_Significant_Beta_Coefficients
dev.off()

pdf(paste("SM_Figure_Significant_Beta_Coefficients_DOMAIN.pdf",sep=""),width=(8*1.75), height=8*1.25)
SM_Significant_Beta_Coefficients_DOMAIN
dev.off()
